Package loading

source("../src/functions.R")
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     Filter, Find, Map, Position, Reduce, anyDuplicated, append,
##     as.data.frame, basename, cbind, colnames, dirname, do.call,
##     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
##     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## Attaching package: 'BiocStyle'
## The following objects are masked from 'package:rmarkdown':
## 
##     html_document, md_document, pdf_document
## The following object is masked from 'package:shiny':
## 
##     markdown

Data loading

count <- read.table("../data/count.txt.gz", row.names=1, header=TRUE, sep="\t")
expdesign <- read.table("../data/expdesign.txt.gz", row.names=1, header=TRUE, sep="\t", skip=6, stringsAsFactors = FALSE)

Wide -> Long

# Merge
# Filtering non single-cells / zero count cells
count %>%
    rownames_to_column("gene") %>%
        pivot_longer(-gene, names_to="cell", values_to="exp") %>%
            mutate(cell = str_replace(cell, "^X", "")) %>%
                inner_join(expdesign,
                    by=c("cell"="Column_name_in_processed_data_file")) %>%
                    rename_all(tolower) %>%
                        group_by(cell) %>%
                            mutate(sum=sum(exp)) %>%
                                    filter(sum != 0 &&
                                        number_of_cells == 1 &&
                                        group_name %in%
                                            c("B cell", "CD8+pDC", "monocyte_or_neutrophil", "NK_cell")) %>%
                                            ungroup %>%
                                                arrange(cell) -> marsdata

Long -> Wide

# Extract gene expression part
marsdata %>%
    select(gene, cell, exp) %>%
        pivot_wider(names_from="cell", values_from="exp") -> wide_marsdata
# Gene name
wide_marsdata %>%
    select(gene) %>%
        data.frame %>%
            .[,1] -> genename
# Expression matrix -> SingleCellExperiment
wide_marsdata %>%
    select(!gene) %>%
        as.matrix %>%
            SingleCellExperiment(assays=list(counts=.[])) -> sce_marsdata
# rownames
genename -> rownames(sce_marsdata)
# coldata
marsdata %>%
    select(!c(gene, exp)) %>%
        distinct %>%
            arrange(cell) %>%
                DataFrame -> colData(sce_marsdata)

scater

# Analysis workflow of scater
sce_marsdata %>%
    logNormCounts %>%
        runPCA(ntop=2000, ncomponents=10) %>%
            runTSNE(dimred = "PCA") %>%
                runUMAP(dimred = "PCA") -> sce_marsdata

Visualization

Standard plot functions of R

pairs(reducedDim(sce_marsdata, "PCA"),
    col=factor(colData(sce_marsdata)$group_name),
    pch=16, cex=0.5, main="PCA")

plot(reducedDim(sce_marsdata, "TSNE"),
    col=factor(colData(sce_marsdata)$group_name),
    xlab="tSNE1", ylab="tSNE2",
    pch=16, cex=2, main="tSNE")

# Pair/scatter plot (scater)

sce_marsdata %>%
    plotReducedDim(dimred="PCA", colour_by="group_name", ncomponents=10)

sce_marsdata %>%
    plotReducedDim(dimred="TSNE", colour_by="group_name")

sce_marsdata %>%
    plotReducedDim(dimred="UMAP", colour_by="group_name")

ggplot2

reducedDim(sce_marsdata, "TSNE") %>%
    cbind(colData(sce_marsdata)$group_name) %>%
        data.frame %>%
            mutate_at(vars(-X3), as.numeric) %>%
            ggplot(aes(x=X1, y=X2, color=X3)) +
                geom_point() +
                    xlab("PC1") +
                        ylab("PC2") +
                            theme(legend.title = element_blank())

ggpairs

reducedDim(sce_marsdata, "PCA") %>%
    cbind(colData(sce_marsdata)$group_name) %>%
        data.frame %>%
            mutate_at(vars(-V11), as.numeric) %>%
            ggpairs(columns=1:10, aes(color=V11))

pairsD3

reducedDim(sce_marsdata, "PCA") %>%
    .[,1:5] %>%
        pairsD3(group=colData(sce_marsdata)$group_name,
            tooltip=colData(sce_marsdata)$cell)

Plotly

reducedDim(sce_marsdata) %>%
    as_tibble %>%
        select(PC1, PC2, PC3) %>%
            data.frame %>%
                plot_ly(x=~PC1, y=~PC2, z=~PC3,
                type = "scatter3d", mode = "markers",
                text = colData(sce_marsdata)$cell,
                color =~colData(sce_marsdata)$group_name
                )
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

iSEE

# app <- iSEE(sce_marsdata)
# runApp(app)

Session info

## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] C
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] knitr_1.30                  BiocStyle_2.16.1           
##  [3] rmarkdown_2.5               pairsD3_0.1.0              
##  [5] shiny_1.5.0                 iSEE_2.0.0                 
##  [7] plotly_4.9.2.1              GGally_2.0.0               
##  [9] scater_1.16.2               SingleCellExperiment_1.10.1
## [11] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
## [13] matrixStats_0.57.0          Biobase_2.48.0             
## [15] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [17] IRanges_2.22.2              S4Vectors_0.26.1           
## [19] BiocGenerics_0.34.0         forcats_0.5.0              
## [21] stringr_1.4.0               dplyr_1.0.2                
## [23] purrr_0.3.4                 readr_1.4.0                
## [25] tidyr_1.1.2                 tibble_3.0.4               
## [27] ggplot2_3.3.2               tidyverse_1.3.0            
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1              backports_1.1.10         
##   [3] circlize_0.4.10           plyr_1.8.6               
##   [5] igraph_1.2.6              lazyeval_0.2.2           
##   [7] shinydashboard_0.7.1      splines_4.0.2            
##   [9] crosstalk_1.1.0.1         BiocParallel_1.22.0      
##  [11] digest_0.6.26             htmltools_0.5.0          
##  [13] viridis_0.5.1             fansi_0.4.1              
##  [15] magrittr_1.5              cluster_2.1.0            
##  [17] ComplexHeatmap_2.4.3      modelr_0.1.8             
##  [19] colorspace_1.4-1          blob_1.2.1               
##  [21] rvest_0.3.6               haven_2.3.1              
##  [23] xfun_0.18                 crayon_1.3.4             
##  [25] RCurl_1.98-1.2            jsonlite_1.7.1           
##  [27] glue_1.4.2                gtable_0.3.0             
##  [29] zlibbioc_1.34.0           XVector_0.28.0           
##  [31] GetoptLong_1.0.4          BiocSingular_1.4.0       
##  [33] shape_1.4.5               scales_1.1.1             
##  [35] DBI_1.1.0                 miniUI_0.1.1.1           
##  [37] Rcpp_1.0.5                viridisLite_0.3.0        
##  [39] xtable_1.8-4              clue_0.3-57              
##  [41] rsvd_1.0.3                DT_0.16                  
##  [43] htmlwidgets_1.5.2         httr_1.4.2               
##  [45] FNN_1.1.3                 RColorBrewer_1.1-2       
##  [47] shinyAce_0.4.1            ellipsis_0.3.1           
##  [49] farver_2.0.3              pkgconfig_2.0.3          
##  [51] reshape_0.8.8             uwot_0.1.8               
##  [53] dbplyr_1.4.4              labeling_0.4.2           
##  [55] tidyselect_1.1.0          rlang_0.4.8              
##  [57] later_1.1.0.1             munsell_0.5.0            
##  [59] cellranger_1.1.0          tools_4.0.2              
##  [61] cli_2.1.0                 generics_0.0.2           
##  [63] rintrojs_0.2.2            broom_0.7.2              
##  [65] evaluate_0.14             fastmap_1.0.1            
##  [67] yaml_2.2.1                fs_1.5.0                 
##  [69] nlme_3.1-149              mime_0.9                 
##  [71] xml2_1.3.2                compiler_4.0.2           
##  [73] rstudioapi_0.11           beeswarm_0.2.3           
##  [75] png_0.1-7                 reprex_0.3.0             
##  [77] stringi_1.5.3             RSpectra_0.16-0          
##  [79] lattice_0.20-41           Matrix_1.2-18            
##  [81] shinyjs_2.0.0             vctrs_0.3.4              
##  [83] pillar_1.4.6              lifecycle_0.2.0          
##  [85] BiocManager_1.30.10       GlobalOptions_0.1.2      
##  [87] BiocNeighbors_1.6.0       cowplot_1.1.0            
##  [89] data.table_1.13.2         bitops_1.0-6             
##  [91] irlba_2.3.3               httpuv_1.5.4             
##  [93] R6_2.4.1                  promises_1.1.1           
##  [95] gridExtra_2.3             vipor_0.4.5              
##  [97] colourpicker_1.1.0        assertthat_0.2.1         
##  [99] rjson_0.2.20              shinyWidgets_0.5.4       
## [101] withr_2.3.0               GenomeInfoDbData_1.2.3   
## [103] mgcv_1.8-33               hms_0.5.3                
## [105] grid_4.0.2                DelayedMatrixStats_1.10.1
## [107] Rtsne_0.15                lubridate_1.7.9          
## [109] ggbeeswarm_0.6.0